Animating Question

Which economic event(s), or conditions, are correlated with real estate price fluctuations since the year 2000?

Additional Questions

Tidying Datasets

Tidying Census Population Estimates data

# Population Estimates 2000-2010
pop2000_2010 <- read.csv(("~/Desktop/Weylandt Project (Housing)/Raw Data /Population Estimate 2000_2010.csv")) |> 
                filter(SEX == 0) |>
                filter(AGE == 999) |>
                select(-SEX,-AGE,-DIVISION,-STATE,-ESTIMATESBASE2000,-CENSUS2010POP) |>
                filter(NAME != 'United States')
glimpse(pop2000_2010)
## Rows: 51
## Columns: 13
## $ REGION          <int> 3, 4, 4, 3, 4, 4, 1, 3, 3, 3, 3, 4, 4, 2, 2, 2, 2, 3, …
## $ NAME            <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2000 <int> 4452173, 627963, 5160586, 2678588, 33987977, 4326921, …
## $ POPESTIMATE2001 <int> 4467634, 633714, 5273477, 2691571, 34479458, 4425687, …
## $ POPESTIMATE2002 <int> 4480089, 642337, 5396255, 2705927, 34871843, 4490406, …
## $ POPESTIMATE2003 <int> 4503491, 648414, 5510364, 2724816, 35253159, 4528732, …
## $ POPESTIMATE2004 <int> 4530729, 659286, 5652404, 2749686, 35574576, 4575013, …
## $ POPESTIMATE2005 <int> 4569805, 666946, 5839077, 2781097, 35827943, 4631888, …
## $ POPESTIMATE2006 <int> 4628981, 675302, 6029141, 2821761, 36021202, 4720423, …
## $ POPESTIMATE2007 <int> 4672840, 680300, 6167681, 2848650, 36250311, 4803868, …
## $ POPESTIMATE2008 <int> 4718206, 687455, 6280362, 2874554, 36604337, 4889730, …
## $ POPESTIMATE2009 <int> 4757938, 698895, 6343154, 2896843, 36961229, 4972195, …
## $ POPESTIMATE2010 <int> 4785298, 713985, 6413737, 2921606, 37349363, 5049071, …
# Population Estimates 2011-2019
pop2011_2019 <- read.csv("~/Desktop/Weylandt Project (Housing)/Raw Data /Population Estimates 2010_2019.csv") |>
                select(-SUMLEV,-DIVISION,-STATE,-CENSUS2010POP,-ESTIMATESBASE2010,-POPESTIMATE2010) |>
                select(NAME,POPESTIMATE2011:POPESTIMATE2019) |>
                filter(NAME != 'United States') |>
                filter(NAME != 'Northeast Region') |>
                filter(NAME != 'Midwest Region') |>
                filter(NAME != 'West Region') |>
                filter(NAME != 'South Region') |>
                filter(NAME != 'Puerto Rico')
glimpse(pop2011_2019)
## Rows: 51
## Columns: 10
## $ NAME            <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2011 <int> 4799069, 722128, 6472643, 2940667, 37638369, 5121108, …
## $ POPESTIMATE2012 <int> 4815588, 730443, 6554978, 2952164, 37948800, 5192647, …
## $ POPESTIMATE2013 <int> 4830081, 737068, 6632764, 2959400, 38260787, 5269035, …
## $ POPESTIMATE2014 <int> 4841799, 736283, 6730413, 2967392, 38596972, 5350101, …
## $ POPESTIMATE2015 <int> 4852347, 737498, 6829676, 2978048, 38918045, 5450623, …
## $ POPESTIMATE2016 <int> 4863525, 741456, 6941072, 2989918, 39167117, 5539215, …
## $ POPESTIMATE2017 <int> 4874486, 739700, 7044008, 3001345, 39358497, 5611885, …
## $ POPESTIMATE2018 <int> 4887681, 735139, 7158024, 3009733, 39461588, 5691287, …
## $ POPESTIMATE2019 <int> 4903185, 731545, 7278717, 3017804, 39512223, 5758736, …
# Population Estimates 2020-2023
pop2020_2023 <- read.csv("~/Desktop/Weylandt Project (Housing)/Raw Data /Population Estimate 2020_2023.csv") |>
                select(NAME:POPESTIMATE2023) |>
                filter(NAME != 'United States') |>
                filter(NAME != 'Northeast Region') |>
                filter(NAME != 'Midwest Region') |>
                filter(NAME != 'West Region') |>
                filter(NAME != 'South Region') |>
                filter(NAME != 'Puerto Rico') |>
                filter(NAME != 'New England') |>
                filter(NAME != 'Middle Atlantic') |>
                filter(NAME != 'East North Central') |>
                filter(NAME != 'West North Central') |>
                filter(NAME != 'South Atlantic') |>
                filter(NAME != 'East South Central') |>
                filter(NAME != 'West South Central') |>
                filter(NAME != 'Mountain') |>
                filter(NAME != 'Pacific') |>
                select(-ESTIMATESBASE2020)
glimpse(pop2020_2023)
## Rows: 51
## Columns: 5
## $ NAME            <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2020 <int> 5031864, 732964, 7186683, 3014348, 39503200, 5785219, …
## $ POPESTIMATE2021 <int> 5050380, 734923, 7272487, 3028443, 39145060, 5811596, …
## $ POPESTIMATE2022 <int> 5073903, 733276, 7365684, 3046404, 39040616, 5841039, …
## $ POPESTIMATE2023 <int> 5108468, 733406, 7431344, 3067732, 38965193, 5877610, …
# Population Estimates of all states from 2000-2023
pop_estimate <- left_join(pop2000_2010,pop2011_2019,join_by(NAME==NAME)) |> 
                left_join(pop2020_2023,join_by(NAME==NAME)) |>
                rename('STATE'=NAME)
glimpse(pop_estimate)
## Rows: 51
## Columns: 26
## $ REGION          <int> 3, 4, 4, 3, 4, 4, 1, 3, 3, 3, 3, 4, 4, 2, 2, 2, 2, 3, …
## $ STATE           <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2000 <int> 4452173, 627963, 5160586, 2678588, 33987977, 4326921, …
## $ POPESTIMATE2001 <int> 4467634, 633714, 5273477, 2691571, 34479458, 4425687, …
## $ POPESTIMATE2002 <int> 4480089, 642337, 5396255, 2705927, 34871843, 4490406, …
## $ POPESTIMATE2003 <int> 4503491, 648414, 5510364, 2724816, 35253159, 4528732, …
## $ POPESTIMATE2004 <int> 4530729, 659286, 5652404, 2749686, 35574576, 4575013, …
## $ POPESTIMATE2005 <int> 4569805, 666946, 5839077, 2781097, 35827943, 4631888, …
## $ POPESTIMATE2006 <int> 4628981, 675302, 6029141, 2821761, 36021202, 4720423, …
## $ POPESTIMATE2007 <int> 4672840, 680300, 6167681, 2848650, 36250311, 4803868, …
## $ POPESTIMATE2008 <int> 4718206, 687455, 6280362, 2874554, 36604337, 4889730, …
## $ POPESTIMATE2009 <int> 4757938, 698895, 6343154, 2896843, 36961229, 4972195, …
## $ POPESTIMATE2010 <int> 4785298, 713985, 6413737, 2921606, 37349363, 5049071, …
## $ POPESTIMATE2011 <int> 4799069, 722128, 6472643, 2940667, 37638369, 5121108, …
## $ POPESTIMATE2012 <int> 4815588, 730443, 6554978, 2952164, 37948800, 5192647, …
## $ POPESTIMATE2013 <int> 4830081, 737068, 6632764, 2959400, 38260787, 5269035, …
## $ POPESTIMATE2014 <int> 4841799, 736283, 6730413, 2967392, 38596972, 5350101, …
## $ POPESTIMATE2015 <int> 4852347, 737498, 6829676, 2978048, 38918045, 5450623, …
## $ POPESTIMATE2016 <int> 4863525, 741456, 6941072, 2989918, 39167117, 5539215, …
## $ POPESTIMATE2017 <int> 4874486, 739700, 7044008, 3001345, 39358497, 5611885, …
## $ POPESTIMATE2018 <int> 4887681, 735139, 7158024, 3009733, 39461588, 5691287, …
## $ POPESTIMATE2019 <int> 4903185, 731545, 7278717, 3017804, 39512223, 5758736, …
## $ POPESTIMATE2020 <int> 5031864, 732964, 7186683, 3014348, 39503200, 5785219, …
## $ POPESTIMATE2021 <int> 5050380, 734923, 7272487, 3028443, 39145060, 5811596, …
## $ POPESTIMATE2022 <int> 5073903, 733276, 7365684, 3046404, 39040616, 5841039, …
## $ POPESTIMATE2023 <int> 5108468, 733406, 7431344, 3067732, 38965193, 5877610, …
# Separating Region and State to join other data sets 
region_state <- pop_estimate |>
                select(REGION,STATE)
glimpse(region_state)
## Rows: 51
## Columns: 2
## $ REGION <int> 3, 4, 4, 3, 4, 4, 1, 3, 3, 3, 3, 4, 4, 2, 2, 2, 2, 3, 3, 1, 3, …
## $ STATE  <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colo…
# Population Estimates by Region 
allRegions_PopEstimates <- pop_estimate |>
                           select(-STATE) |>
                           group_by(REGION) |>
                           summarise_all(mean) |>
                           pivot_longer(cols = "POPESTIMATE2000":"POPESTIMATE2023",
                           names_to = c("YearlyPopEstimate"),
                           values_to = "Population")
allRegions_PopEstimates$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", allRegions_PopEstimates$YearlyPopEstimate)
allRegions_PopEstimates <- allRegions_PopEstimates |> select(-YearlyPopEstimate)
glimpse(allRegions_PopEstimates)
## Rows: 96
## Columns: 3
## $ REGION     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Population <dbl> 5962922, 5990614, 6015991, 6037161, 6047059, 6050137, 60580…
## $ Year       <chr> "2000", "2001", "2002", "2003", "2004", "2005", "2006", "20…
# Convert To CSV
#write.csv(allRegions_PopEstimates, 'allRegions_PopEstimates.csv', row.names=FALSE)

Tidying Zillow Average Home Prices data

# Original Zillow data  
Zillow <- read.csv("~/Desktop/Weylandt Project (Housing)/Raw Data /Zillow.csv") |>
          select(-RegionID,-SizeRank,-RegionType,-StateName) 
Zillow[is.na(Zillow)] <- 0
glimpse(Zillow)
## Rows: 51
## Columns: 291
## $ RegionName  <chr> "California", "Texas", "Florida", "New York", "Pennsylvani…
## $ X2000.01.31 <dbl> 190252.33, 110404.23, 105986.02, 150566.75, 96332.77, 1243…
## $ X2000.02.29 <dbl> 190891.82, 110464.23, 106215.60, 151102.92, 96538.81, 1244…
## $ X2000.03.31 <dbl> 191755.30, 110493.33, 106493.89, 151619.07, 96732.44, 1246…
## $ X2000.04.30 <dbl> 193629.64, 110637.75, 107059.58, 152729.15, 97127.03, 1251…
## $ X2000.05.31 <dbl> 195808.29, 110731.28, 107669.62, 153877.70, 97532.38, 1257…
## $ X2000.06.30 <dbl> 198159.04, 110836.15, 108292.06, 155119.60, 97948.96, 1264…
## $ X2000.07.31 <dbl> 200641.60, 110905.06, 108917.46, 156236.06, 98391.34, 1271…
## $ X2000.08.31 <dbl> 203233.53, 111108.94, 109554.40, 157213.33, 98813.64, 1282…
## $ X2000.09.30 <dbl> 205831.47, 111376.30, 110214.58, 158081.72, 99243.93, 1294…
## $ X2000.10.31 <dbl> 208295.16, 111662.78, 110888.53, 159012.53, 99656.59, 1308…
## $ X2000.11.30 <dbl> 210721.43, 111993.36, 111588.88, 160152.84, 100111.84, 131…
## $ X2000.12.31 <dbl> 213096.00, 112360.56, 112311.22, 161404.53, 100582.73, 132…
## $ X2001.01.31 <dbl> 215104.78, 112579.79, 112971.72, 162605.10, 101047.52, 133…
## $ X2001.02.28 <dbl> 217091.04, 112546.66, 113627.92, 163604.28, 101472.50, 134…
## $ X2001.03.31 <dbl> 219326.75, 112327.29, 114313.89, 164505.48, 101899.93, 134…
## $ X2001.04.30 <dbl> 222106.55, 112198.77, 115124.18, 165446.37, 102358.57, 135…
## $ X2001.05.31 <dbl> 224892.10, 112241.92, 116011.70, 166485.49, 102797.49, 136…
## $ X2001.06.30 <dbl> 227422.54, 112372.77, 116938.13, 167632.43, 103244.37, 137…
## $ X2001.07.31 <dbl> 229791.61, 112526.48, 117934.07, 168836.55, 103725.17, 137…
## $ X2001.08.31 <dbl> 232074.24, 112664.54, 118989.69, 170100.51, 104254.80, 139…
## $ X2001.09.30 <dbl> 234289.75, 112809.75, 120084.07, 171336.72, 104790.87, 140…
## $ X2001.10.31 <dbl> 236361.29, 112961.63, 121181.45, 172637.88, 105283.46, 141…
## $ X2001.11.30 <dbl> 238307.38, 113072.60, 122233.89, 173999.96, 105783.66, 142…
## $ X2001.12.31 <dbl> 240127.2, 113162.3, 123266.1, 175446.2, 106256.8, 143577.4…
## $ X2002.01.31 <dbl> 241597.59, 113192.85, 124198.00, 176874.15, 106727.58, 144…
## $ X2002.02.28 <dbl> 243100.85, 113277.65, 125060.05, 178170.49, 107193.83, 144…
## $ X2002.03.31 <dbl> 244860.38, 113490.15, 125902.88, 179269.91, 107679.96, 145…
## $ X2002.04.30 <dbl> 247234.6, 113863.8, 126851.2, 180353.7, 108214.9, 146308.6…
## $ X2002.05.31 <dbl> 250006.8, 114304.1, 127891.7, 181509.0, 108799.5, 147090.0…
## $ X2002.06.30 <dbl> 253082.53, 114797.34, 128979.66, 182850.51, 109443.05, 147…
## $ X2002.07.31 <dbl> 256640.83, 115298.72, 130087.27, 184291.52, 110163.33, 148…
## $ X2002.08.31 <dbl> 260471.23, 115828.55, 131270.97, 185900.57, 110976.26, 149…
## $ X2002.09.30 <dbl> 264341.07, 116295.52, 132442.05, 187558.42, 111819.87, 151…
## $ X2002.10.31 <dbl> 268013.44, 116696.25, 133611.59, 189211.30, 112646.35, 152…
## $ X2002.11.30 <dbl> 271611.37, 117037.32, 134722.64, 190817.39, 113357.31, 153…
## $ X2002.12.31 <dbl> 275159.29, 117327.93, 135926.35, 192480.51, 114033.66, 154…
## $ X2003.01.31 <dbl> 278294.28, 117550.76, 137112.98, 194104.98, 114681.53, 155…
## $ X2003.02.28 <dbl> 281217.07, 117697.84, 138332.42, 195685.41, 115333.20, 155…
## $ X2003.03.31 <dbl> 284313.05, 117864.08, 139582.90, 197371.98, 115956.08, 156…
## $ X2003.04.30 <dbl> 287911.69, 118059.09, 140976.11, 199033.73, 116614.62, 157…
## $ X2003.05.31 <dbl> 291908.22, 118280.33, 142447.73, 200734.29, 117420.70, 158…
## $ X2003.06.30 <dbl> 295604.89, 118503.29, 143907.90, 202420.79, 118394.31, 159…
## $ X2003.07.31 <dbl> 299507.68, 118723.14, 145353.52, 204190.00, 119463.63, 160…
## $ X2003.08.31 <dbl> 303618.14, 118978.73, 146831.61, 206198.58, 120494.95, 161…
## $ X2003.09.30 <dbl> 308184.36, 119210.56, 148366.05, 208373.28, 121506.90, 162…
## $ X2003.10.31 <dbl> 312632.18, 119446.64, 149920.62, 210611.37, 122463.50, 163…
## $ X2003.11.30 <dbl> 316918.02, 119617.06, 151445.62, 212568.53, 123330.40, 165…
## $ X2003.12.31 <dbl> 321062.46, 119761.34, 152904.71, 214323.88, 124102.55, 165…
## $ X2004.01.31 <dbl> 325352.95, 120122.35, 154303.55, 216032.57, 124787.33, 166…
## $ X2004.02.29 <dbl> 330185.30, 120934.99, 155688.80, 217651.87, 125576.10, 167…
## $ X2004.03.31 <dbl> 335697.13, 122221.40, 157206.49, 219009.19, 126368.80, 168…
## $ X2004.04.30 <dbl> 342024.59, 123497.99, 158937.06, 220358.10, 127249.37, 169…
## $ X2004.05.31 <dbl> 349310.12, 124552.91, 160963.08, 221945.75, 128285.77, 170…
## $ X2004.06.30 <dbl> 357440.89, 125229.99, 163261.31, 223904.54, 129509.15, 171…
## $ X2004.07.31 <dbl> 365753.52, 125862.90, 165814.52, 226107.37, 130945.32, 173…
## $ X2004.08.31 <dbl> 373686.71, 126438.60, 168626.25, 228373.52, 132348.59, 174…
## $ X2004.09.30 <dbl> 381199.71, 127088.64, 171481.39, 230759.44, 133749.88, 175…
## $ X2004.10.31 <dbl> 388415.35, 127693.44, 174224.24, 233103.98, 134970.30, 177…
## $ X2004.11.30 <dbl> 394759.12, 128234.50, 176796.84, 235479.60, 136205.00, 178…
## $ X2004.12.31 <dbl> 400888.10, 128707.36, 179418.31, 237750.05, 137410.89, 179…
## $ X2005.01.31 <dbl> 406342.52, 129210.98, 182110.16, 239914.42, 138653.19, 180…
## $ X2005.02.28 <dbl> 412254.3, 129634.7, 184988.7, 242023.1, 139661.0, 182014.5…
## $ X2005.03.31 <dbl> 418468.5, 130012.2, 188216.1, 243963.3, 140626.0, 183146.1…
## $ X2005.04.30 <dbl> 425475.5, 130345.0, 192020.0, 246010.9, 141675.9, 184389.1…
## $ X2005.05.31 <dbl> 432291.5, 130707.0, 196264.5, 248185.3, 142939.7, 185658.5…
## $ X2005.06.30 <dbl> 438594.3, 131102.6, 200865.7, 250432.0, 144401.8, 186938.1…
## $ X2005.07.31 <dbl> 444624.6, 131452.7, 205776.0, 252796.2, 145966.2, 188164.6…
## $ X2005.08.31 <dbl> 450741.6, 131780.3, 210851.0, 255267.8, 147605.5, 189516.8…
## $ X2005.09.30 <dbl> 456886.6, 132108.4, 215825.1, 257862.7, 149145.9, 191067.5…
## $ X2005.10.31 <dbl> 462918.3, 132374.2, 220406.2, 260503.3, 150550.5, 192755.3…
## $ X2005.11.30 <dbl> 467811.4, 132684.9, 224641.6, 262890.4, 151839.2, 194098.9…
## $ X2005.12.31 <dbl> 472142.6, 132938.0, 228610.3, 265269.0, 153048.8, 195244.2…
## $ X2006.01.31 <dbl> 475751.3, 133145.6, 232278.6, 267348.2, 154122.4, 196246.0…
## $ X2006.02.28 <dbl> 478960.9, 133355.1, 235682.0, 269250.1, 155058.2, 197342.6…
## $ X2006.03.31 <dbl> 482018.1, 133609.6, 239218.8, 270820.3, 155936.5, 198463.8…
## $ X2006.04.30 <dbl> 484762.9, 134017.8, 243156.6, 272184.2, 156865.9, 199469.0…
## $ X2006.05.31 <dbl> 487490.5, 134576.0, 246932.9, 273472.5, 157905.9, 200486.7…
## $ X2006.06.30 <dbl> 489377.0, 135194.9, 250094.5, 274803.1, 159044.8, 201223.0…
## $ X2006.07.31 <dbl> 490077.5, 135812.0, 252264.1, 276048.5, 160099.1, 201783.9…
## $ X2006.08.31 <dbl> 490083.7, 136360.2, 253688.3, 277162.8, 160952.1, 202202.8…
## $ X2006.09.30 <dbl> 489033.5, 136943.1, 254285.0, 277882.6, 161546.8, 202538.9…
## $ X2006.10.31 <dbl> 487836.7, 137663.2, 254258.0, 278411.4, 162009.8, 202805.7…
## $ X2006.11.30 <dbl> 486560.6, 138352.9, 253877.0, 278717.3, 162379.8, 203043.3…
## $ X2006.12.31 <dbl> 485453.4, 139037.0, 253114.8, 279042.3, 162692.0, 203364.6…
## $ X2007.01.31 <dbl> 484539.6, 139398.1, 252150.5, 279313.5, 163007.9, 203859.2…
## $ X2007.02.28 <dbl> 483467.6, 139540.3, 251108.2, 279534.5, 163385.4, 204355.1…
## $ X2007.03.31 <dbl> 481934.0, 139522.5, 249910.6, 279827.5, 163859.0, 204751.4…
## $ X2007.04.30 <dbl> 479967.9, 139738.6, 248527.6, 279947.3, 164296.2, 204936.2…
## $ X2007.05.31 <dbl> 476901.3, 140133.9, 246726.7, 280177.0, 164716.6, 204956.4…
## $ X2007.06.30 <dbl> 473369.6, 140647.9, 244475.9, 280212.1, 165036.5, 204837.6…
## $ X2007.07.31 <dbl> 468849.7, 141106.6, 241764.5, 280268.1, 165381.4, 204567.9…
## $ X2007.08.31 <dbl> 464136.5, 141594.1, 238767.8, 280345.6, 165629.0, 204225.3…
## $ X2007.09.30 <dbl> 458839.5, 142074.5, 235699.9, 280351.5, 165753.5, 203680.7…
## $ X2007.10.31 <dbl> 453280.8, 142516.9, 232562.1, 280555.0, 165835.1, 203074.3…
## $ X2007.11.30 <dbl> 447693.1, 142885.5, 229281.1, 280759.9, 165939.4, 202442.8…
## $ X2007.12.31 <dbl> 441703.2, 143225.5, 225953.7, 280871.1, 166021.4, 201693.6…
## $ X2008.01.31 <dbl> 435738.7, 143088.1, 222338.7, 280939.9, 165978.7, 200876.0…
## $ X2008.02.29 <dbl> 428698.7, 142690.7, 218770.8, 280802.5, 165948.6, 199267.0…
## $ X2008.03.31 <dbl> 420288.9, 142105.5, 214960.3, 280645.7, 165883.4, 198512.8…
## $ X2008.04.30 <dbl> 410431.0, 141798.4, 211249.8, 280288.8, 165838.0, 197106.3…
## $ X2008.05.31 <dbl> 399937.8, 141578.4, 207263.4, 279692.0, 165637.4, 196611.5…
## $ X2008.06.30 <dbl> 389747.7, 141381.1, 203386.7, 278877.4, 165317.3, 194983.0…
## $ X2008.07.31 <dbl> 379427.2, 141217.9, 199422.9, 277797.9, 164748.0, 193925.9…
## $ X2008.08.31 <dbl> 369491.6, 140870.1, 195301.9, 276630.4, 164007.0, 191945.2…
## $ X2008.09.30 <dbl> 360312.3, 140367.0, 191110.8, 275325.4, 163225.1, 190261.0…
## $ X2008.10.31 <dbl> 352374.5, 139768.5, 186871.7, 273757.2, 162546.4, 187975.5…
## $ X2008.11.30 <dbl> 344679.0, 139181.3, 182703.6, 272040.3, 161907.6, 185834.3…
## $ X2008.12.31 <dbl> 337295.4, 138638.3, 178718.4, 270394.1, 161219.0, 183319.7…
## $ X2009.01.31 <dbl> 330393.3, 138144.3, 174970.2, 268871.3, 160516.6, 180608.5…
## $ X2009.02.28 <dbl> 325367.0, 137884.9, 171639.7, 267561.0, 160055.1, 178448.0…
## $ X2009.03.31 <dbl> 321204.0, 137778.4, 168508.7, 266399.4, 159827.2, 176341.8…
## $ X2009.04.30 <dbl> 317292.2, 137762.0, 165391.1, 265410.2, 159689.8, 175010.0…
## $ X2009.05.31 <dbl> 313501.5, 137761.8, 162375.1, 264501.0, 159451.0, 173431.5…
## $ X2009.06.30 <dbl> 310188.7, 137737.9, 159506.1, 263513.6, 159123.4, 172172.0…
## $ X2009.07.31 <dbl> 307731.9, 137748.7, 157051.3, 262490.3, 158765.0, 170935.7…
## $ X2009.08.31 <dbl> 305678.6, 137633.4, 154789.6, 261385.7, 158343.5, 169815.6…
## $ X2009.09.30 <dbl> 304070.2, 137481.2, 152718.9, 260455.3, 157945.2, 168286.6…
## $ X2009.10.31 <dbl> 303212.8, 137279.2, 150967.0, 259735.4, 157673.7, 166798.1…
## $ X2009.11.30 <dbl> 304057.5, 137321.6, 149799.0, 259275.3, 157618.6, 165343.5…
## $ X2009.12.31 <dbl> 305990.8, 137445.5, 148911.3, 258966.7, 157704.1, 164600.2…
## $ X2010.01.31 <dbl> 307954.36, 137505.72, 148227.96, 258741.54, 157947.67, 163…
## $ X2010.02.28 <dbl> 308774.67, 137492.24, 147632.90, 258723.21, 158287.22, 163…
## $ X2010.03.31 <dbl> 309822.77, 137517.05, 147227.55, 258801.00, 158759.16, 162…
## $ X2010.04.30 <dbl> 311128.19, 137690.46, 146873.64, 259191.12, 159217.80, 162…
## $ X2010.05.31 <dbl> 312565.53, 137890.72, 146414.76, 259608.33, 159518.72, 162…
## $ X2010.06.30 <dbl> 312499.69, 137936.00, 145722.79, 260166.34, 159571.76, 162…
## $ X2010.07.31 <dbl> 311346.90, 137691.54, 144763.80, 260523.49, 159328.61, 161…
## $ X2010.08.31 <dbl> 309377.75, 137219.39, 143566.24, 260437.15, 158866.71, 160…
## $ X2010.09.30 <dbl> 307368.59, 136660.12, 142258.93, 259897.09, 158178.67, 158…
## $ X2010.10.31 <dbl> 305213.04, 136097.40, 140891.75, 258921.09, 157366.24, 157…
## $ X2010.11.30 <dbl> 303303.70, 135513.63, 139507.23, 257899.82, 156472.95, 155…
## $ X2010.12.31 <dbl> 301943.75, 135006.55, 138187.50, 256864.99, 155629.91, 154…
## $ X2011.01.31 <dbl> 300884.79, 134565.93, 136868.70, 256134.99, 154897.02, 152…
## $ X2011.02.28 <dbl> 299870.25, 134102.34, 135618.16, 255574.97, 154303.17, 151…
## $ X2011.03.31 <dbl> 298375.69, 133646.77, 134340.95, 255230.39, 153850.45, 150…
## $ X2011.04.30 <dbl> 296784.89, 133205.94, 133105.25, 254767.25, 153366.58, 149…
## $ X2011.05.31 <dbl> 294394.87, 132856.83, 131871.11, 254270.70, 152806.97, 148…
## $ X2011.06.30 <dbl> 292471.27, 132501.02, 130755.53, 253530.70, 152092.52, 147…
## $ X2011.07.31 <dbl> 290796.22, 132303.55, 129727.48, 252806.75, 151379.14, 146…
## $ X2011.08.31 <dbl> 288969.17, 132187.39, 128918.63, 252099.75, 150786.77, 145…
## $ X2011.09.30 <dbl> 286788.2, 132145.8, 128310.6, 251454.4, 150361.8, 145095.9…
## $ X2011.10.31 <dbl> 284459.72, 132034.37, 127962.04, 250811.88, 150013.15, 143…
## $ X2011.11.30 <dbl> 283309.96, 131826.06, 127783.86, 250112.42, 149664.21, 142…
## $ X2011.12.31 <dbl> 282560.62, 131582.74, 127702.65, 249520.94, 149324.32, 141…
## $ X2012.01.31 <dbl> 282174.35, 131471.32, 127730.85, 249000.51, 149028.89, 140…
## $ X2012.02.29 <dbl> 281612.98, 131624.41, 127850.87, 248793.90, 148963.62, 139…
## $ X2012.03.31 <dbl> 280982.04, 131897.30, 128042.87, 248982.27, 149278.18, 138…
## $ X2012.04.30 <dbl> 280551.19, 132171.80, 128400.70, 249375.38, 149742.98, 138…
## $ X2012.05.31 <dbl> 280747.24, 132478.21, 128863.97, 249865.72, 150186.67, 137…
## $ X2012.06.30 <dbl> 281856.82, 132909.96, 129491.70, 250225.12, 150365.78, 137…
## $ X2012.07.31 <dbl> 283465.90, 133399.29, 130121.35, 250558.77, 150440.25, 138…
## $ X2012.08.31 <dbl> 286010.20, 133764.67, 130681.24, 250570.15, 150319.22, 138…
## $ X2012.09.30 <dbl> 289446.23, 134077.93, 131276.86, 250597.05, 150199.69, 138…
## $ X2012.10.31 <dbl> 293831.14, 134390.14, 132015.33, 250436.28, 150024.31, 138…
## $ X2012.11.30 <dbl> 298632.6, 134750.5, 133061.1, 250421.2, 149895.4, 137892.9…
## $ X2012.12.31 <dbl> 303742.95, 135112.13, 134454.81, 250395.57, 149809.93, 137…
## $ X2013.01.31 <dbl> 308583.76, 135617.03, 136048.79, 250796.57, 149804.20, 137…
## $ X2013.02.28 <dbl> 312867.7, 136258.6, 137676.1, 251463.5, 149824.9, 138131.8…
## $ X2013.03.31 <dbl> 316547.55, 137019.04, 139336.64, 252248.87, 150052.34, 138…
## $ X2013.04.30 <dbl> 320879.4, 137740.9, 141047.0, 252990.5, 150477.9, 139959.0…
## $ X2013.05.31 <dbl> 326796.9, 138564.8, 142976.1, 253830.2, 151107.6, 140865.5…
## $ X2013.06.30 <dbl> 333957.4, 139548.0, 144866.4, 254996.8, 151638.9, 141977.2…
## $ X2013.07.31 <dbl> 341130.4, 140491.5, 146682.3, 256159.3, 151795.5, 142884.2…
## $ X2013.08.31 <dbl> 347991.2, 141406.2, 148416.0, 257203.3, 151850.2, 144077.0…
## $ X2013.09.30 <dbl> 354714.7, 142120.2, 150100.8, 257958.1, 151781.6, 145162.1…
## $ X2013.10.31 <dbl> 361355.1, 142934.3, 151879.7, 258759.3, 151994.2, 146547.9…
## $ X2013.11.30 <dbl> 366925.2, 143679.9, 153450.8, 259423.2, 152146.8, 147746.2…
## $ X2013.12.31 <dbl> 371412.0, 144516.5, 155053.8, 259871.6, 152354.3, 148877.7…
## $ X2014.01.31 <dbl> 374659.4, 145498.9, 156579.6, 260256.9, 152491.2, 149701.2…
## $ X2014.02.28 <dbl> 377344.3, 146607.9, 158223.9, 260784.8, 152587.1, 150591.5…
## $ X2014.03.31 <dbl> 379786.0, 147726.7, 159766.0, 261754.0, 152682.6, 151366.3…
## $ X2014.04.30 <dbl> 382763.7, 148699.3, 161139.3, 262882.2, 152921.7, 152157.7…
## $ X2014.05.31 <dbl> 385941.4, 149684.4, 162493.7, 264223.4, 153358.6, 152974.6…
## $ X2014.06.30 <dbl> 388558.2, 150684.6, 163616.2, 265148.5, 153820.4, 153847.5…
## $ X2014.07.31 <dbl> 390278.8, 151823.6, 164772.6, 265909.9, 154300.8, 154814.5…
## $ X2014.08.31 <dbl> 391787.3, 152847.9, 165756.7, 266362.6, 154537.1, 155612.9…
## $ X2014.09.30 <dbl> 394277.9, 153751.0, 166779.5, 266970.1, 154559.4, 156197.3…
## $ X2014.10.31 <dbl> 397437.2, 154391.3, 167637.9, 267441.2, 154235.8, 156546.9…
## $ X2014.11.30 <dbl> 400977.8, 155200.4, 168710.6, 268088.3, 154033.2, 156877.8…
## $ X2014.12.31 <dbl> 404247.0, 156147.2, 169982.6, 268640.6, 154015.0, 157236.4…
## $ X2015.01.31 <dbl> 407153.5, 157331.9, 171371.4, 269054.2, 154092.7, 157461.6…
## $ X2015.02.28 <dbl> 409610.0, 158559.7, 172588.5, 269298.1, 154143.6, 157481.4…
## $ X2015.03.31 <dbl> 411620.7, 159760.0, 173663.2, 269545.7, 154330.6, 157710.7…
## $ X2015.04.30 <dbl> 413566.8, 160883.3, 174742.5, 269995.1, 154741.3, 158097.1…
## $ X2015.05.31 <dbl> 415767.3, 161958.0, 175965.1, 270802.1, 155321.3, 158831.8…
## $ X2015.06.30 <dbl> 418022.3, 163028.5, 177218.6, 271686.6, 155838.7, 159434.4…
## $ X2015.07.31 <dbl> 419799.2, 164141.5, 178647.9, 272787.7, 156454.6, 160078.9…
## $ X2015.08.31 <dbl> 420990.1, 165302.7, 180161.0, 274108.4, 157162.3, 160654.2…
## $ X2015.09.30 <dbl> 422008.7, 166486.7, 181807.0, 275855.2, 157855.2, 161322.0…
## $ X2015.10.31 <dbl> 423264.3, 167679.7, 183526.9, 277744.4, 158378.1, 162251.2…
## $ X2015.11.30 <dbl> 424312.2, 168730.3, 185185.4, 279288.1, 158642.5, 163135.8…
## $ X2015.12.31 <dbl> 425268.1, 169819.9, 186725.1, 280539.3, 158994.0, 163994.3…
## $ X2016.01.31 <dbl> 428434.8, 170986.7, 188144.4, 281424.5, 159472.1, 164819.1…
## $ X2016.02.29 <dbl> 433762.0, 172203.9, 189489.8, 282116.1, 159962.0, 165708.6…
## $ X2016.03.31 <dbl> 439813.5, 173101.4, 190726.5, 282429.2, 160002.4, 166322.3…
## $ X2016.04.30 <dbl> 444385.8, 173955.4, 192036.4, 282988.6, 159995.3, 166888.0…
## $ X2016.05.31 <dbl> 447418.8, 174779.6, 193332.7, 283846.9, 160188.8, 167440.6…
## $ X2016.06.30 <dbl> 450099.0, 175761.9, 194718.5, 285177.9, 160682.1, 168117.6…
## $ X2016.07.31 <dbl> 451079.2, 176580.9, 195934.9, 286272.2, 161067.6, 168423.3…
## $ X2016.08.31 <dbl> 450770.7, 177377.1, 197195.2, 287055.3, 161454.4, 168620.5…
## $ X2016.09.30 <dbl> 449289.3, 178287.5, 198611.0, 287657.9, 161942.4, 168788.4…
## $ X2016.10.31 <dbl> 448952.6, 179291.5, 200186.0, 288497.0, 162563.4, 169309.2…
## $ X2016.11.30 <dbl> 450021.3, 180421.1, 201627.6, 289793.4, 163131.8, 169963.7…
## $ X2016.12.31 <dbl> 452600.8, 181506.5, 202823.3, 291583.6, 163773.1, 170889.6…
## $ X2017.01.31 <dbl> 457480.7, 182574.4, 203870.9, 293224.8, 164334.1, 171832.4…
## $ X2017.02.28 <dbl> 463902.4, 183527.9, 205043.4, 294957.3, 164831.3, 172864.3…
## $ X2017.03.31 <dbl> 470876.7, 184544.6, 206412.2, 296486.4, 165211.9, 173967.1…
## $ X2017.04.30 <dbl> 476572.4, 185670.1, 207866.5, 298546.8, 165654.0, 174841.8…
## $ X2017.05.31 <dbl> 480748.5, 186683.6, 209140.2, 300206.3, 166117.8, 175482.0…
## $ X2017.06.30 <dbl> 484327.4, 187597.0, 210453.8, 301663.4, 166604.3, 175675.4…
## $ X2017.07.31 <dbl> 486069.8, 188304.0, 211701.8, 302738.2, 167114.3, 175821.5…
## $ X2017.08.31 <dbl> 487575.8, 189053.5, 213110.8, 304068.7, 167673.5, 176009.6…
## $ X2017.09.30 <dbl> 489161.5, 189814.5, 214475.5, 305419.4, 168298.7, 176609.4…
## $ X2017.10.31 <dbl> 492609.7, 190722.2, 215939.0, 307107.3, 169058.9, 177434.1…
## $ X2017.11.30 <dbl> 496433.5, 191721.7, 217445.1, 308476.2, 169960.7, 178458.0…
## $ X2017.12.31 <dbl> 500927.5, 192831.4, 219021.9, 310004.2, 170865.1, 179479.8…
## $ X2018.01.31 <dbl> 506002.1, 193931.8, 220424.3, 311384.4, 171673.4, 180567.6…
## $ X2018.02.28 <dbl> 511598.8, 194933.1, 221656.0, 312490.0, 172379.0, 181514.0…
## $ X2018.03.31 <dbl> 516700.6, 196106.0, 222895.0, 313930.3, 173355.9, 182641.8…
## $ X2018.04.30 <dbl> 520897.0, 197203.4, 224122.9, 315540.7, 174283.9, 183483.8…
## $ X2018.05.31 <dbl> 524736.3, 198347.8, 225396.4, 317883.7, 175021.0, 184137.8…
## $ X2018.06.30 <dbl> 527861.4, 199216.9, 226469.1, 319998.1, 175385.1, 184095.9…
## $ X2018.07.31 <dbl> 530704.6, 200210.4, 227859.7, 322385.7, 175765.3, 184151.3…
## $ X2018.08.31 <dbl> 533754.8, 201104.2, 229218.2, 324714.0, 176202.8, 184082.9…
## $ X2018.09.30 <dbl> 537196.2, 201986.0, 230597.7, 327102.6, 176621.1, 184223.3…
## $ X2018.10.31 <dbl> 540856.9, 202658.6, 231732.9, 328684.8, 176863.6, 184200.7…
## $ X2018.11.30 <dbl> 543445.5, 203421.3, 232938.4, 329810.2, 177136.7, 184354.7…
## $ X2018.12.31 <dbl> 544962.3, 204269.4, 234241.3, 330470.4, 177504.4, 184731.7…
## $ X2019.01.31 <dbl> 544489.2, 205109.6, 235417.0, 331544.2, 178158.2, 185526.7…
## $ X2019.02.28 <dbl> 542862.7, 205850.1, 236472.0, 333056.6, 179141.6, 186691.6…
## $ X2019.03.31 <dbl> 541019.8, 206490.8, 237185.4, 334696.2, 180176.0, 187787.2…
## $ X2019.04.30 <dbl> 539962.0, 207031.8, 237700.5, 336075.6, 181011.5, 188611.3…
## $ X2019.05.31 <dbl> 539328.8, 207518.0, 237977.4, 336863.7, 181560.4, 188978.9…
## $ X2019.06.30 <dbl> 538938.3, 207989.6, 238378.5, 337825.4, 182159.3, 189167.5…
## $ X2019.07.31 <dbl> 539348.7, 208490.1, 238897.1, 338806.0, 182834.7, 189065.1…
## $ X2019.08.31 <dbl> 541064.8, 209089.9, 239604.5, 339979.1, 183543.5, 188964.7…
## $ X2019.09.30 <dbl> 543688.0, 209745.3, 240307.6, 340620.4, 184186.9, 188706.5…
## $ X2019.10.31 <dbl> 546780.2, 210622.7, 241239.5, 341343.4, 184997.6, 188775.9…
## $ X2019.11.30 <dbl> 550147.0, 211661.6, 242514.0, 342787.6, 186029.6, 189094.9…
## $ X2019.12.31 <dbl> 553636.3, 212792.8, 244084.7, 344624.3, 187148.8, 189782.5…
## $ X2020.01.31 <dbl> 556486.2, 213941.2, 245987.8, 346888.5, 188253.8, 190751.2…
## $ X2020.02.29 <dbl> 558778.1, 215052.3, 247780.7, 348614.7, 189412.0, 191879.9…
## $ X2020.03.31 <dbl> 560986.2, 216135.8, 249355.9, 350203.5, 190654.9, 192983.7…
## $ X2020.04.30 <dbl> 563025.1, 216917.0, 250314.0, 351523.1, 192017.8, 193779.7…
## $ X2020.05.31 <dbl> 563282.7, 217195.9, 250578.2, 352671.2, 192844.6, 193882.8…
## $ X2020.06.30 <dbl> 562062.7, 217362.9, 250710.7, 353291.8, 193186.4, 193586.3…
## $ X2020.07.31 <dbl> 561905.2, 218015.5, 251276.7, 353467.6, 193483.7, 193309.7…
## $ X2020.08.31 <dbl> 565186.1, 219433.1, 252836.7, 353870.7, 194478.8, 193796.0…
## $ X2020.09.30 <dbl> 572469.7, 221573.2, 255338.8, 355902.3, 196595.7, 195343.0…
## $ X2020.10.31 <dbl> 581822.3, 223988.6, 258299.5, 359232.3, 199254.8, 197438.0…
## $ X2020.11.30 <dbl> 592712.9, 226743.2, 261775.7, 363633.2, 202436.3, 199914.0…
## $ X2020.12.31 <dbl> 603576.0, 229412.0, 265230.7, 368487.6, 205431.3, 202233.8…
## $ X2021.01.31 <dbl> 612596.5, 232213.7, 268806.7, 373082.8, 208188.7, 204531.8…
## $ X2021.02.28 <dbl> 620314.3, 235198.1, 272369.3, 377878.0, 210880.3, 207040.3…
## $ X2021.03.31 <dbl> 627306.0, 238577.0, 276176.2, 382358.6, 213682.0, 209598.3…
## $ X2021.04.30 <dbl> 636459.0, 242326.8, 280409.8, 386521.6, 216555.3, 212008.4…
## $ X2021.05.31 <dbl> 647419.3, 246618.0, 285533.0, 390683.3, 219441.2, 214175.5…
## $ X2021.06.30 <dbl> 659186.9, 251209.4, 291805.2, 394581.2, 222466.7, 216434.1…
## $ X2021.07.31 <dbl> 669822.8, 255421.6, 298228.1, 398024.8, 224661.5, 218356.4…
## $ X2021.08.31 <dbl> 677181.2, 258632.5, 303942.3, 400124.3, 225513.9, 219476.3…
## $ X2021.09.30 <dbl> 682170.9, 260979.7, 308572.4, 401162.2, 224774.3, 219624.4…
## $ X2021.10.31 <dbl> 686625.5, 263414.6, 313131.1, 402661.6, 224184.2, 219781.2…
## $ X2021.11.30 <dbl> 692498.8, 266567.0, 318230.8, 404915.8, 224289.5, 220462.0…
## $ X2021.12.31 <dbl> 699739.4, 270190.7, 323743.4, 407580.3, 225236.9, 221851.0…
## $ X2022.01.31 <dbl> 709837.1, 274958.1, 330663.5, 410719.1, 227530.8, 224290.9…
## $ X2022.02.28 <dbl> 722794.8, 280290.1, 338712.4, 414524.3, 230895.8, 227666.3…
## $ X2022.03.31 <dbl> 738419.6, 286319.3, 348212.8, 419603.0, 235106.4, 231725.7…
## $ X2022.04.30 <dbl> 752628.5, 292169.1, 358203.3, 424799.4, 238922.8, 235521.7…
## $ X2022.05.31 <dbl> 763338.9, 297545.9, 367778.1, 429542.3, 241871.0, 238074.4…
## $ X2022.06.30 <dbl> 769033.5, 302239.0, 376288.9, 434080.3, 244173.0, 239808.1…
## $ X2022.07.31 <dbl> 769344.7, 305092.6, 382063.6, 437141.2, 245058.4, 240163.5…
## $ X2022.08.31 <dbl> 764650.1, 305988.4, 384986.6, 438058.1, 244851.7, 239698.0…
## $ X2022.09.30 <dbl> 757141.7, 305328.0, 385360.3, 436505.6, 243906.8, 238433.9…
## $ X2022.10.31 <dbl> 750382.4, 304383.7, 384882.7, 434695.1, 243612.8, 237587.6…
## $ X2022.11.30 <dbl> 745070.5, 303387.7, 384183.5, 433155.1, 243808.8, 237114.1…
## $ X2022.12.31 <dbl> 740517.2, 302107.9, 383346.3, 431588.4, 244013.9, 236856.7…
## $ X2023.01.31 <dbl> 733433.3, 300419.2, 382012.6, 429948.6, 243834.9, 236348.6…
## $ X2023.02.28 <dbl> 726150.5, 298813.3, 380883.4, 428586.3, 243450.5, 236127.7…
## $ X2023.03.31 <dbl> 720793.3, 297741.5, 380424.0, 428679.5, 243602.5, 236515.7…
## $ X2023.04.30 <dbl> 720203.2, 297371.6, 380864.7, 430156.3, 244405.4, 237842.6…
## $ X2023.05.31 <dbl> 723208.9, 297691.2, 381907.4, 432573.3, 245913.8, 239675.3…
## $ X2023.06.30 <dbl> 728472.3, 298520.4, 383565.7, 435414.0, 247702.2, 241772.7…
## $ X2023.07.31 <dbl> 735567.6, 299267.1, 385252.1, 438426.2, 249442.2, 243733.8…
## $ X2023.08.31 <dbl> 743596.1, 299846.6, 386923.3, 441487.5, 251109.6, 245662.5…
## $ X2023.09.30 <dbl> 751268.3, 299884.5, 388085.5, 444085.4, 252410.6, 247137.7…
## $ X2023.10.31 <dbl> 757506.6, 299654.4, 389138.5, 445927.0, 253291.4, 248324.4…
## $ X2023.11.30 <dbl> 761970.8, 299166.0, 390061.9, 447465.0, 253820.7, 249164.1…
## $ X2023.12.31 <dbl> 764813.1, 298562.8, 390885.9, 449149.1, 254254.4, 249790.3…
## $ X2024.01.31 <dbl> 765351.8, 298364.6, 391584.8, 450934.0, 254753.3, 250247.7…
## $ X2024.02.29 <dbl> 765196.9, 298623.9, 392305.6, 453138.4, 255570.2, 251267.1…
# Averaging columns horizontally to consolidate values by year 
years <- 2000:2023
for (year in years) {
  monthly_cols <- grep(paste0("X", year), names(Zillow), value = TRUE)
  Zillow[[paste0(year, "_Yearly_Avg")]] <- rowMeans(Zillow[monthly_cols], na.rm = TRUE)
}


# Zillow data formatted 
Zillow_new <- Zillow |>
              select(RegionName,"2000_Yearly_Avg":"2023_Yearly_Avg") |>
              rename('STATE'=RegionName) |>
              inner_join(region_state, join_by("STATE"=="STATE")) |> # From Census Population Estimate data 
              select(REGION,STATE,"2000_Yearly_Avg":"2023_Yearly_Avg")
glimpse(Zillow_new)
## Rows: 51
## Columns: 26
## $ REGION            <int> 4, 3, 3, 1, 1, 2, 2, 3, 3, 2, 1, 3, 4, 4, 1, 3, 2, 3…
## $ STATE             <chr> "California", "Texas", "Florida", "New York", "Penns…
## $ `2000_Yearly_Avg` <dbl> 200192.97, 111081.16, 108765.99, 155593.02, 98251.04…
## $ `2001_Yearly_Avg` <dbl> 228074.6, 112622.0, 117723.1, 168553.1, 103576.3, 13…
## $ `2002_Yearly_Avg` <dbl> 256343.33, 115117.52, 129745.37, 184107.29, 110087.9…
## $ `2003_Yearly_Avg` <dbl> 298431.00, 118641.07, 144765.18, 203801.40, 119146.8…
## $ `2004_Yearly_Avg` <dbl> 362059.5, 125048.8, 165560.2, 225873.0, 130617.2, 17…
## $ `2005_Yearly_Avg` <dbl> 440712.6, 131195.9, 204214.6, 252093.2, 145509.5, 18…
## $ `2006_Yearly_Avg` <dbl> 485617.2, 135672.3, 247404.2, 274595.3, 159051.1, 20…
## $ `2007_Yearly_Avg` <dbl> 467890.2, 141032.0, 241410.7, 280180.3, 164905.1, 20…
## $ `2008_Yearly_Avg` <dbl> 385701.9, 141057.1, 201008.2, 277266.0, 164354.7, 19…
## $ `2009_Yearly_Avg` <dbl> 312390.7, 137664.9, 159719.0, 263213.8, 158892.8, 17…
## $ `2010_Yearly_Avg` <dbl> 308441.58, 137018.40, 144272.92, 259147.93, 158262.1…
## $ `2011_Yearly_Avg` <dbl> 291638.81, 132746.56, 131080.41, 253026.26, 151903.8…
## $ `2012_Yearly_Avg` <dbl> 286921.13, 133170.64, 130165.97, 249935.16, 149854.5…
## $ `2013_Yearly_Avg` <dbl> 338596.8, 139991.4, 145627.9, 255475.1, 151235.7, 14…
## $ `2014_Yearly_Avg` <dbl> 389004.9, 151088.6, 163788.2, 264871.9, 153628.6, 15…
## $ `2015_Yearly_Avg` <dbl> 417615.3, 163640.2, 178466.9, 273392.1, 156329.6, 16…
## $ `2016_Yearly_Avg` <dbl> 445552.3, 176187.8, 195402.2, 285736.9, 161186.3, 16…
## $ `2017_Yearly_Avg` <dbl> 482223.8, 187753.7, 211206.8, 301908.2, 167143.7, 17…
## $ `2018_Yearly_Avg` <dbl> 528226.4, 199449.1, 227296.0, 321199.6, 175182.7, 18…
## $ `2019_Yearly_Avg` <dbl> 543438.8, 208532.7, 239148.2, 338185.2, 182579.0, 18…
## $ `2020_Yearly_Avg` <dbl> 570191.1, 219647.6, 253290.4, 354815.5, 194837.5, 19…
## $ `2021_Yearly_Avg` <dbl> 659276.7, 251779.1, 295079.0, 393297.9, 219989.5, 21…
## $ `2022_Yearly_Avg` <dbl> 748596.6, 296650.8, 368723.5, 428701.0, 240312.7, 23…
## $ `2023_Yearly_Avg` <dbl> 738915.4, 298911.6, 385000.4, 437658.2, 248603.2, 24…
# Zillow Average Home Prices by Region 
allRegions_Zillow <- Zillow_new |>
                     select(-STATE) |>
                     group_by(REGION) |>
                     summarise_all(mean) |>
                     pivot_longer(cols = "2000_Yearly_Avg":"2023_Yearly_Avg",
                     names_to = c("Year_Column"),
                     values_to = "ZillowAvgHomePrice")
allRegions_Zillow$Year <- gsub("(\\d{4})_Yearly_Avg", "\\1", allRegions_Zillow$Year_Column)
allRegions_Zillow <- allRegions_Zillow |> select(-Year_Column)
glimpse(allRegions_Zillow)
## Rows: 96
## Columns: 3
## $ REGION             <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year               <chr> "2000", "2001", "2002", "2003", "2004", "2005", "20…
# Convert to CSV
#write.csv(allRegions_Zillow, 'allRegions_Zillow.csv', row.names=FALSE) 

Tidying Regional Data from The U.S. Bureau of Economic Analysis (BEA)

# Imported BEA Data
BEA_unclean <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /RegionalData.csv', show_col_types = FALSE)
glimpse(BEA_unclean)
## Rows: 1,071
## Columns: 27
## $ GeoFips     <dbl> 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000…
## $ GeoName     <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Al…
## $ LineCode    <dbl> NA, 1, 2, 3, NA, 4, 5, 6, 7, NA, 8, 9, NA, 10, 11, 12, NA,…
## $ Description <chr> "Real dollar statistics", "Real GDP (millions of chained 2…
## $ `2000`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "108002.9", "95839…
## $ `2001`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "111894.1", "99375…
## $ `2002`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "115007.7", "10390…
## $ `2003`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "120241", "109568.…
## $ `2004`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "128606.3", "11762…
## $ `2005`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "136087.5", "12320…
## $ `2006`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "144719", "130008"…
## $ `2007`      <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "151942.6", "13620…
## $ `2008`      <chr> NA, "(NA)", "199308.2", "164093.5", NA, "(NA)", "157780.3"…
## $ `2009`      <chr> NA, "(NA)", "199810.9", "164430.3", NA, "(NA)", "155666.9"…
## $ `2010`      <chr> NA, "(NA)", "199934.3", "163000.8", NA, "(NA)", "162068.8"…
## $ `2011`      <chr> NA, "(NA)", "201384.9", "163835.5", NA, "(NA)", "167882.2"…
## $ `2012`      <chr> NA, "(NA)", "201397.8", "163555", NA, "(NA)", "172101.5", …
## $ `2013`      <chr> NA, "(NA)", "201172.1", "166493.3", NA, "(NA)", "173720.7"…
## $ `2014`      <chr> NA, "(NA)", "205291.8", "169501.8", NA, "(NA)", "179487.1"…
## $ `2015`      <chr> NA, "(NA)", "215147.9", "175050.6", NA, "(NA)", "187474.7"…
## $ `2016`      <chr> NA, "(NA)", "214859.9", "177257.9", NA, "(NA)", "190871.8"…
## $ `2017`      <dbl> NA, 216615.500, 220235.900, 181495.600, NA, 216615.500, 19…
## $ `2018`      <dbl> NA, 220808.800, 231109.900, 190026.500, NA, 226263.800, 20…
## $ `2019`      <dbl> NA, 224944.600, 235777.400, 193422.300, NA, 234526.400, 21…
## $ `2020`      <dbl> NA, 222081.400, 252611.700, 191489.300, NA, 235118.300, 23…
## $ `2021`      <dbl> NA, 231892.600, 264054.000, 206172.000, NA, 257986.500, 25…
## $ `2022`      <dbl> NA, 235807.300, 253984.200, 211182.900, NA, 281569.000, 25…
# Disposable Income data extracted from BEA 
Disposable_Income <- BEA_unclean |> 
                     filter(LineCode == 6) |> 
                     select(-c('Description', 'LineCode', 'GeoFips')) |> 
                     mutate(across('2000':'2022', as.numeric)) |>
                     pivot_longer(c('2000':'2022'),
                        names_to = "Year", 
                        values_to = "Average Disposable Income", 
                        values_drop_na = FALSE) |>
                     pivot_wider(names_from = "Year", values_from = "Average Disposable Income")
colnames(Disposable_Income)[colnames(Disposable_Income) == "GeoName"] <- "STATE"
glimpse(Disposable_Income)
## Rows: 51
## Columns: 24
## $ STATE  <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colo…
## $ `2000` <dbl> 95839.8, 17933.0, 118337.5, 54020.7, 932100.4, 124352.3, 118615…
## $ `2001` <dbl> 99375.0, 18968.7, 124820.6, 57065.8, 988772.1, 132523.8, 125846…
## $ `2002` <dbl> 103906.5, 20293.0, 132597.0, 59393.7, 1040694.4, 136160.2, 1312…
## $ `2003` <dbl> 109568.8, 21450.3, 141828.8, 63652.8, 1098063.6, 140456.1, 1343…
## $ `2004` <dbl> 117627.3, 22543.5, 154311.8, 68042.8, 1164414.7, 145983.9, 1410…
## $ `2005` <dbl> 123206.1, 23868.0, 167738.1, 70833.0, 1209975.6, 153717.8, 1460…
## $ `2006` <dbl> 130008.0, 25276.3, 184940.1, 75056.5, 1295546.9, 164602.5, 1569…
## $ `2007` <dbl> 136204.2, 26972.1, 195625.4, 79532.9, 1346007.1, 174261.5, 1668…
## $ `2008` <dbl> 141827.1, 29693.1, 201198.3, 83120.6, 1376342.8, 183754.7, 1806…
## $ `2009` <dbl> 143231.0, 30530.9, 195955.8, 83435.7, 1378288.7, 180175.7, 1864…
## $ `2010` <dbl> 148862.1, 32703.9, 198815.2, 86387.0, 1431825.6, 184323.2, 1919…
## $ `2011` <dbl> 152835.4, 34789.2, 206128.2, 91406.0, 1504645.4, 197828.9, 1943…
## $ `2012` <dbl> 157273.8, 35945.3, 214865.8, 97557.8, 1590509.7, 208823.5, 1963…
## $ `2013` <dbl> 157600.0, 35184.4, 218160.9, 96883.6, 1587112.7, 217662.1, 1871…
## $ `2014` <dbl> 162978.1, 37075.5, 228628.0, 102362.7, 1679406.9, 235238.8, 195…
## $ `2015` <dbl> 169520.5, 38075.3, 239543.4, 105216.0, 1786764.4, 243622.7, 199…
## $ `2016` <dbl> 172173.6, 37704.1, 250636.5, 107887.7, 1871763.1, 246133.3, 205…
## $ `2017` <dbl> 179134.7, 38525.4, 266666.0, 111671.5, 1961488.7, 264253.1, 210…
## $ `2018` <dbl> 187334.2, 40309.8, 283699.5, 117322.8, 2057748.6, 288001.5, 221…
## $ `2019` <dbl> 195208.0, 41045.3, 302159.1, 119309.6, 2169277.3, 307696.4, 227…
## $ `2020` <dbl> 210513.8, 42025.7, 337272.5, 129589.1, 2368785.6, 330301.7, 236…
## $ `2021` <dbl> 228206.2, 44143.4, 364326.7, 141244.9, 2520230.0, 363127.6, 242…
## $ `2022` <dbl> 229599.5, 45685.6, 377143.3, 142906.9, 2464043.4, 378514.4, 244…
# Consumer Spending data extracted from BEA
Consumer_Spending <- BEA_unclean |> 
                     filter(LineCode == 7) |> 
                     select(-c('Description', 'LineCode', 'GeoFips')) |> 
                     mutate(across('2000':'2022', as.numeric)) |>
                     pivot_longer(c('2000':'2022'),
                        names_to = "Year", 
                        values_to = "Personal Consumption Expenditures", 
                        values_drop_na = FALSE) |>
                     pivot_wider(names_from = "Year", values_from = "Personal Consumption Expenditures")
colnames(Consumer_Spending)[colnames(Consumer_Spending) == "GeoName"] <- "STATE"
glimpse(Consumer_Spending)
## Rows: 51
## Columns: 24
## $ STATE  <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colo…
## $ `2000` <dbl> 89748.7, 17107.7, 121418.9, 50566.1, 844374.3, 117037.5, 100901…
## $ `2001` <dbl> 93232.7, 18123.5, 127277.9, 52796.3, 886833.1, 123305.4, 105264…
## $ `2002` <dbl> 97220.1, 19432.2, 133667.9, 54843.4, 924960.4, 126789.8, 109481…
## $ `2003` <dbl> 102106.6, 20557.2, 142860.7, 57788.1, 975445.5, 132387.1, 11506…
## $ `2004` <dbl> 108428.7, 21847.5, 155367.0, 61297.4, 1041764.2, 140464.2, 1220…
## $ `2005` <dbl> 115631.1, 23354.8, 170011.9, 65560.6, 1114921.0, 148825.1, 1294…
## $ `2006` <dbl> 121487.4, 24275.1, 184063.5, 69198.3, 1186034.5, 158054.6, 1361…
## $ `2007` <dbl> 126916.4, 25617.0, 194363.4, 72561.3, 1256410.5, 167441.9, 1417…
## $ `2008` <dbl> 130009.5, 26749.3, 197557.7, 75160.6, 1295036.3, 173274.1, 1459…
## $ `2009` <dbl> 128265.5, 27166.7, 190868.2, 74484.9, 1253678.5, 170721.1, 1429…
## $ `2010` <dbl> 132314.4, 28537.5, 196474.0, 77458.3, 1297976.1, 175992.9, 1467…
## $ `2011` <dbl> 136772.4, 29947.6, 203580.7, 80992.6, 1354341.6, 184841.9, 1509…
## $ `2012` <dbl> 139950.6, 31039.1, 209936.3, 83237.7, 1409173.4, 189763.8, 1548…
## $ `2013` <dbl> 143956.7, 31673.2, 216830.0, 85154.6, 1453927.1, 198951.0, 1585…
## $ `2014` <dbl> 148385.6, 32641.4, 226151.5, 88584.1, 1522216.3, 210765.5, 1644…
## $ `2015` <dbl> 152790.5, 33274.4, 234755.7, 91366.0, 1595724.1, 222464.3, 1686…
## $ `2016` <dbl> 157806.1, 33976.4, 243679.6, 94866.1, 1665899.0, 233801.6, 1716…
## $ `2017` <dbl> 163523.4, 34926.8, 259029.1, 98052.5, 1752123.2, 247983.2, 1770…
## $ `2018` <dbl> 169460.2, 36077.1, 273816.1, 102359.8, 1856117.3, 261807.9, 183…
## $ `2019` <dbl> 176843.5, 36844.1, 286488.5, 104313.8, 1938018.7, 275707.0, 186…
## $ `2020` <dbl> 175481.9, 35006.0, 291542.5, 105433.5, 1885028.5, 273942.3, 184…
## $ `2021` <dbl> 197820.2, 39632.5, 334350.1, 118796.2, 2146539.4, 314541.5, 204…
## $ `2022` <dbl> 215104.6, 43412.8, 368866.6, 128662.4, 2352361.6, 346723.2, 219…
# Disposable Income data by Region
allRegions_DisposableIncome <- Disposable_Income |>
                               inner_join(region_state, join_by("STATE"=="STATE")) |> # From Census Population Estimate data 
                               select(REGION,'2000':'2022') |>
                               group_by(REGION) |>
                               summarise_all(mean) |>
                               pivot_longer(cols = "2000":"2022",
                               names_to = c("Year_Column"),
                               values_to = "Disposable_Income")
allRegions_DisposableIncome$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", allRegions_DisposableIncome$Year_Column)
allRegions_DisposableIncome <- allRegions_DisposableIncome |> select(-Year_Column)
glimpse(allRegions_DisposableIncome)
## Rows: 92
## Columns: 3
## $ REGION            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Disposable_Income <dbl> 177084.0, 184665.9, 192364.1, 198596.9, 208179.7, 21…
## $ Year              <chr> "2000", "2001", "2002", "2003", "2004", "2005", "200…
# Consumer Spending data by Region
allRegions_ConsumerSpending <- Consumer_Spending |>
                               inner_join(region_state, join_by("STATE"=="STATE")) |> # From Census Population Estimate data 
                               select(REGION,'2000':'2022') |>
                               group_by(REGION) |>
                               summarise_all(mean) |>
                               pivot_longer(cols = "2000":"2022",
                               names_to = c("Year_Column"),
                               values_to = "Consumer_Spending")
allRegions_ConsumerSpending$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", allRegions_ConsumerSpending$Year_Column)
allRegions_ConsumerSpending <- allRegions_ConsumerSpending |> select(-Year_Column)
glimpse(allRegions_ConsumerSpending)
## Rows: 92
## Columns: 3
## $ REGION            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Consumer_Spending <dbl> 156018.2, 163250.9, 169863.4, 179593.7, 190131.8, 20…
## $ Year              <chr> "2000", "2001", "2002", "2003", "2004", "2005", "200…
# Convert to CSV
#write.csv(allRegions_DisposableIncome, 'allRegions_DisposableIncome.csv', row.names=FALSE)
#write.csv(allRegions_ConsumerSpending, 'allRegions_ConsumerSpending.csv', row.names=FALSE)

Tidying Sales of Single-Family Homes data

# Sales of Single-Family Homes - Northeast Region 
Northeast_SalesSF <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in Northeast Census Region, Thousands, Seasonally Adjusted Annual Rate .csv', show_col_types = FALSE) |>
                     rename('NorthEast'= HSN1FNE) |>
                     rename('Year'= DATE) 
Northeast_SalesSF$Year <- format(Northeast_SalesSF$Year, "%Y")
Northeast_SalesSF$NorthEast <- round(Northeast_SalesSF$NorthEast * 1000)
Northeast_Sales_new <- Northeast_SalesSF |>
                       pivot_wider(names_from = Year, values_from = NorthEast) |>
                       cbind(REGION = '1') |>
                       select(REGION, '2000':'2023')
glimpse(Northeast_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "1"
## $ `2000` <dbl> 71250
## $ `2001` <dbl> 65417
## $ `2002` <dbl> 64750
## $ `2003` <dbl> 80083
## $ `2004` <dbl> 82167
## $ `2005` <dbl> 81417
## $ `2006` <dbl> 63583
## $ `2007` <dbl> 64083
## $ `2008` <dbl> 35333
## $ `2009` <dbl> 31417
## $ `2010` <dbl> 30667
## $ `2011` <dbl> 21167
## $ `2012` <dbl> 28833
## $ `2013` <dbl> 30583
## $ `2014` <dbl> 27750
## $ `2015` <dbl> 24750
## $ `2016` <dbl> 32250
## $ `2017` <dbl> 39833
## $ `2018` <dbl> 32333
## $ `2019` <dbl> 30333
## $ `2020` <dbl> 37667
## $ `2021` <dbl> 35833
## $ `2022` <dbl> 32583
## $ `2023` <dbl> 33583
# Sales of Single-Family Homes - Midwest Region 
Midwest_SalesSF <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in Midwest Census Region, Thousands, Seasonally Adjusted Annual Rate.csv', show_col_types = FALSE) |>
                   rename('MidWest'= HSN1FMW) |>
                   rename('Year'= DATE) 
Midwest_SalesSF$Year <- format(Midwest_SalesSF$Year, "%Y")
Midwest_SalesSF$MidWest <- round(Midwest_SalesSF$MidWest * 1000)
Midwest_Sales_new <- Midwest_SalesSF |> 
                     pivot_wider(names_from = Year, values_from = MidWest) |>
                     cbind(REGION = '2') |>
                     select(REGION, '2000':'2023')
glimpse(Midwest_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "2"
## $ `2000` <dbl> 155833
## $ `2001` <dbl> 163500
## $ `2002` <dbl> 188417
## $ `2003` <dbl> 188750
## $ `2004` <dbl> 211000
## $ `2005` <dbl> 203167
## $ `2006` <dbl> 161333
## $ `2007` <dbl> 117833
## $ `2008` <dbl> 69250
## $ `2009` <dbl> 54167
## $ `2010` <dbl> 44500
## $ `2011` <dbl> 45167
## $ `2012` <dbl> 47250
## $ `2013` <dbl> 60833
## $ `2014` <dbl> 58250
## $ `2015` <dbl> 61167
## $ `2016` <dbl> 69417
## $ `2017` <dbl> 71833
## $ `2018` <dbl> 75167
## $ `2019` <dbl> 72417
## $ `2020` <dbl> 93250
## $ `2021` <dbl> 84250
## $ `2022` <dbl> 65250
## $ `2023` <dbl> 68167
# Sales of Single-Family Homes - South Region 
South_SalesSF <- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in South Census Region, Thousands, Seasonally Adjusted Annual Rate .csv", show_col_types = FALSE) |>
                 rename('South'= HSN1FS) |>
                 rename('Year'= DATE) 
South_SalesSF$Year <- format(South_SalesSF$Year, "%Y")
South_SalesSF$South <- round(South_SalesSF$South * 1000)
South_Sales_new <- South_SalesSF |>
                   pivot_wider(names_from = Year, values_from = South) |>
                   cbind(REGION = '3') |>
                   select(REGION, '2000':'2023')
glimpse(South_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "3"
## $ `2000` <dbl> 407083
## $ `2001` <dbl> 439000
## $ `2002` <dbl> 450417
## $ `2003` <dbl> 513417
## $ `2004` <dbl> 561000
## $ `2005` <dbl> 638167
## $ `2006` <dbl> 558917
## $ `2007` <dbl> 408750
## $ `2008` <dbl> 264750
## $ `2009` <dbl> 201667
## $ `2010` <dbl> 172500
## $ `2011` <dbl> 168167
## $ `2012` <dbl> 195417
## $ `2013` <dbl> 233083
## $ `2014` <dbl> 244583
## $ `2015` <dbl> 286083
## $ `2016` <dbl> 316667
## $ `2017` <dbl> 339583
## $ `2018` <dbl> 347333
## $ `2019` <dbl> 400417
## $ `2020` <dbl> 481750
## $ `2021` <dbl> 452167
## $ `2022` <dbl> 391000
## $ `2023` <dbl> 411333
# Sales of Single-Family Homes - West Region 
West_SalesSF <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in West Census Region, Thousands, Seasonally Adjusted Annual Rate .csv', show_col_types = FALSE) |>
                rename('West'= HSN1FW) |>
                rename('Year'= DATE) 
West_SalesSF$Year <- format(West_SalesSF$Year, "%Y")
West_SalesSF$West <- round(West_SalesSF$West * 1000)
West_Sales_new <- West_SalesSF |>
                  pivot_wider(names_from = Year, values_from = West) |>
                  cbind(REGION = '4') |>
                  select(REGION, '2000':'2023')
glimpse(West_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "4"
## $ `2000` <dbl> 246083
## $ `2001` <dbl> 239333
## $ `2002` <dbl> 272583
## $ `2003` <dbl> 308417
## $ `2004` <dbl> 346667
## $ `2005` <dbl> 356167
## $ `2006` <dbl> 265500
## $ `2007` <dbl> 178000
## $ `2008` <dbl> 112833
## $ `2009` <dbl> 87000
## $ `2010` <dbl> 73583
## $ `2011` <dbl> 71917
## $ `2012` <dbl> 96583
## $ `2013` <dbl> 105333
## $ `2014` <dbl> 109667
## $ `2015` <dbl> 130583
## $ `2016` <dbl> 142167
## $ `2017` <dbl> 163417
## $ `2018` <dbl> 159167
## $ `2019` <dbl> 182000
## $ `2020` <dbl> 220083
## $ `2021` <dbl> 197000
## $ `2022` <dbl> 148083
## $ `2023` <dbl> 153417
# Sales of Single-Family Homes - All Regions 
allRegions_SalesSFH <- bind_rows(Northeast_Sales_new,Midwest_Sales_new,South_Sales_new,West_Sales_new) |>
                       pivot_longer(cols = "2000":"2023",
                       names_to = c("Year_Column"),
                       values_to = "SFH_Sales")
allRegions_SalesSFH$Year <- gsub("(\\d{4})_Yearly_Avg", "\\1", allRegions_SalesSFH$Year_Column)
allRegions_SalesSFH <- allRegions_SalesSFH |> select(-Year_Column)
glimpse(allRegions_SalesSFH)
## Rows: 96
## Columns: 3
## $ REGION    <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ SFH_Sales <dbl> 71250, 65417, 64750, 80083, 82167, 81417, 63583, 64083, 3533…
## $ Year      <chr> "2000", "2001", "2002", "2003", "2004", "2005", "2006", "200…
# Convert to CSV
#write.csv(allRegions_SalesSFH, 'allRegions_SalesSFH.csv', row.names=FALSE)

Question 1

Is there a correlation between population growth and changes in home value?

allRegions_PopEstimates <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_PopEstimates.csv", show_col_types = FALSE)
allRegions_Zillow <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)


# Joining All Regions Population Estimates data and All Regions Zillow Average Home Prices data 
Zillow_PopulationEstimates <- inner_join(allRegions_Zillow,allRegions_PopEstimates, join_by(REGION,Year))
glimpse(Zillow_PopulationEstimates)
## Rows: 96
## Columns: 4
## $ REGION             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year               <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ Population         <dbl> 5962922, 5990614, 6015991, 6037161, 6047059, 605013…
# Visualization of "Changes In Home Prices and Population Estimates from 2000-2023" 
ggplot(Zillow_PopulationEstimates, aes(x=Population, y=ZillowAvgHomePrice, color=Year)) +
  geom_point() +
  scale_y_continuous(labels = scales::dollar) +
  scale_x_continuous(labels = scales::comma) +
  facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
  theme_bw() +
  theme(legend.position = 'bottom', axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylab("Zillow Average Home Price") +
  xlab("Population Estimates") +
  ggtitle("Changes In Home Price And Population Estimates (2000-2023)") +
  scale_color_gradient(low="blue", high="red") +
  theme(legend.key.width = unit(1.5,'cm'))

# Correlation test between Population Estimates and Zillow Average Home Prices
cor.test(Zillow_PopulationEstimates$ZillowAvgHomePrice,Zillow_PopulationEstimates$Population)
## 
##  Pearson's product-moment correlation
## 
## data:  Zillow_PopulationEstimates$ZillowAvgHomePrice and Zillow_PopulationEstimates$Population
## t = 2.1823, df = 94, p-value = 0.03158
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.01998641 0.40236423
## sample estimates:
##       cor 
## 0.2195924
# All States Population Estimates 
All_PopEstimates_longer <- pop_estimate |>
          pivot_longer(cols = "POPESTIMATE2000":"POPESTIMATE2023",
          names_to = c("YearlyPopEstimate"),
          values_to = "Population")
All_PopEstimates_longer$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", All_PopEstimates_longer$YearlyPopEstimate)
All_PopEstimates_longer <- All_PopEstimates_longer |> select(-YearlyPopEstimate)
glimpse(All_PopEstimates_longer)
## Rows: 1,224
## Columns: 4
## $ REGION     <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ STATE      <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Ala…
## $ Population <int> 4452173, 4467634, 4480089, 4503491, 4530729, 4569805, 46289…
## $ Year       <chr> "2000", "2001", "2002", "2003", "2004", "2005", "2006", "20…
# All States Zillow Average Home Prices 
All_Zillow_Longer <- Zillow_new |>
          pivot_longer(cols = "2000_Yearly_Avg":"2023_Yearly_Avg",
                       names_to = c("Year_Column"),
                       values_to = "ZillowAvgHomePrice")
All_Zillow_Longer$Year <- gsub("(\\d{4})_Yearly_Avg", "\\1", All_Zillow_Longer$Year_Column)
All_Zillow_Longer <- All_Zillow_Longer |> select(-Year_Column)
glimpse(All_Zillow_Longer)
## Rows: 1,224
## Columns: 4
## $ REGION             <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ STATE              <chr> "California", "California", "California", "Californ…
## $ ZillowAvgHomePrice <dbl> 200193.0, 228074.6, 256343.3, 298431.0, 362059.5, 4…
## $ Year               <chr> "2000", "2001", "2002", "2003", "2004", "2005", "20…
# Region 3 - South 
    All_South_Population <- All_PopEstimates_longer |> filter(REGION == 3)
    # South Population Visual
    ggplot(All_South_Population, aes(x=Year,y=Population)) +
      geom_point(aes(color=STATE)) +
      scale_y_continuous(labels = scales::comma) +
      scale_x_discrete(labels = NULL) +
      facet_wrap(~STATE) +
      theme(legend.position = 'none') +
      ggtitle("Changes In South Population Estimates (2000-2023)")

    # South Population Percent Change 
    All_SouthPop_PerChg_2000to2023 <- All_South_Population |> 
          filter(Year %in% c(2000,2023)) |> 
          group_by(STATE) |>
          summarise(Percent_Change = (Population[Year == 2023] / Population[Year == 2000] - 1) * 100)
    All_SouthPop_PerChg_2000to2023
## # A tibble: 17 × 2
##    STATE                Percent_Change
##    <chr>                         <dbl>
##  1 Alabama                       14.7 
##  2 Arkansas                      14.5 
##  3 Delaware                      31.2 
##  4 District of Columbia          18.7 
##  5 Florida                       40.9 
##  6 Georgia                       34.1 
##  7 Kentucky                      11.8 
##  8 Louisiana                      2.28
##  9 Maryland                      16.4 
## 10 Mississippi                    3.21
## 11 North Carolina                34.1 
## 12 Oklahoma                      17.4 
## 13 South Carolina                33.5 
## 14 Tennessee                     24.9 
## 15 Texas                         45.6 
## 16 Virginia                      22.7 
## 17 West Virginia                 -2.04
    All_South_Zillow <- All_Zillow_Longer |> filter(REGION == 3)
    # South Home Price Visual 
    ggplot(All_South_Zillow, aes(x=Year,y=ZillowAvgHomePrice)) +
      geom_point(aes(color=STATE)) +
      scale_y_continuous(labels = scales::dollar) +
      scale_x_discrete(labels = NULL) +
      facet_wrap(~STATE) +
      theme(legend.position = 'none') +
      ylab("Zillow Average Home Price") +
      ggtitle("Changes In South Home Values (2000-2023)")

    # South Home Price Percent Change 
    All_SouthZillow_PerChg_2000to2023 <- All_South_Zillow |> 
          filter(Year %in% c(2000,2023)) |> 
          group_by(STATE) |>
          summarise(Percent_Change = (ZillowAvgHomePrice[Year == 2023] / ZillowAvgHomePrice[Year == 2000] - 1) * 100)
    All_SouthZillow_PerChg_2000to2023
## # A tibble: 17 × 2
##    STATE                Percent_Change
##    <chr>                         <dbl>
##  1 Alabama                       122. 
##  2 Arkansas                      128. 
##  3 Delaware                      147. 
##  4 District of Columbia          267. 
##  5 Florida                       254. 
##  6 Georgia                       150. 
##  7 Kentucky                      129. 
##  8 Louisiana                      93.2
##  9 Maryland                      162. 
## 10 Mississippi                   102. 
## 11 North Carolina                144. 
## 12 Oklahoma                      139. 
## 13 South Carolina                149. 
## 14 Tennessee                     177. 
## 15 Texas                         169. 
## 16 Virginia                      192. 
## 17 West Virginia                 111.
# Region 4 - West
    All_West_Population <- All_PopEstimates_longer |> filter(REGION == 4)
    # West Population Visual
    ggplot(All_West_Population, aes(x=Year,y=Population)) +
      geom_point(aes(color=STATE)) +
      scale_y_continuous(labels = scales::comma) +
      scale_x_discrete(labels = NULL) +
      facet_wrap(~STATE) +
      theme(legend.position = 'none') +
      ggtitle("Changes In West Population Estimates (2000-2023)")

    # West Population Percent Change 
    All_WestPop_PerChg_2000to2023 <- All_West_Population |> 
          filter(Year %in% c(2000,2023)) |> 
          group_by(STATE) |>
          summarise(Percent_Change = (Population[Year == 2023] / Population[Year == 2000] - 1) * 100)
    All_WestPop_PerChg_2000to2023
## # A tibble: 13 × 2
##    STATE      Percent_Change
##    <chr>               <dbl>
##  1 Alaska               16.8
##  2 Arizona              44.0
##  3 California           14.6
##  4 Colorado             35.8
##  5 Hawaii               18.3
##  6 Idaho                51.2
##  7 Montana              25.3
##  8 Nevada               58.2
##  9 New Mexico           16.1
## 10 Oregon               23.4
## 11 Utah                 52.3
## 12 Washington           32.2
## 13 Wyoming              18.2
    All_West_Zillow <- All_Zillow_Longer |> filter(REGION == 4)
    # West Home Price Visual 
    ggplot(All_West_Zillow, aes(x=Year,y=ZillowAvgHomePrice)) +
      geom_point(aes(color=STATE)) +
      scale_y_continuous(labels = scales::dollar) +
      scale_x_discrete(labels = NULL) +
      facet_wrap(~STATE) +
      theme(legend.position = 'none') +
      ylab("Zillow Average Home Price") +
      ggtitle("Changes In West Home Values (2000-2023)") 

    # West Home Price Percent Change 
    All_WestZillow_PerChg_2000to2023 <- All_West_Zillow |> 
          filter(Year %in% c(2000,2023)) |> 
          group_by(STATE) |>
          summarise(Percent_Change = (ZillowAvgHomePrice[Year == 2023] / ZillowAvgHomePrice[Year == 2000] - 1) * 100)
    All_WestZillow_PerChg_2000to2023
## # A tibble: 13 × 2
##    STATE      Percent_Change
##    <chr>               <dbl>
##  1 Alaska               162.
##  2 Arizona              205.
##  3 California           269.
##  4 Colorado             192.
##  5 Hawaii               319.
##  6 Idaho                261.
##  7 Montana              Inf 
##  8 Nevada               168.
##  9 New Mexico           Inf 
## 10 Oregon               217.
## 11 Utah                 195.
## 12 Washington           219.
## 13 Wyoming              Inf

How does demand fluctuate for single-family homes?

allRegions_SalesSFH <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_SalesSFH.csv", show_col_types = FALSE)


# Visualization of Sales Of Single-Family Homes 
ggplot(allRegions_SalesSFH, aes(x=Year,y=SFH_Sales,color=REGION)) +
  geom_line() +
  scale_y_continuous(labels = scales::comma) +
  facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
  ylab("Single-Family House Sales") +
  xlab("Years") +
  ggtitle("Changes In Single-Family House Sales (2000-2023)") +
  theme_gray() +
  theme(legend.position = 'none') 

Question 2

Does changes in personal income and consumer spending correlate with changes in home value?

Disposable Income and Home Value

allRegions_Zillow <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
allRegions_DisposableIncome <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_DisposableIncome.csv", show_col_types = FALSE)


# Joining All Regions Zillow Average Home Price data and All Regions Disposable Personal Income data
Zillow_DisposableIncome=inner_join(allRegions_Zillow,allRegions_DisposableIncome,join_by('REGION','Year'))
glimpse(Zillow_DisposableIncome)
## Rows: 92
## Columns: 4
## $ REGION             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year               <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ Disposable_Income  <dbl> 177084.0, 184665.9, 192364.1, 198596.9, 208179.7, 2…
# Visualization of Changes In Home Price And Disposable Income
ggplot(Zillow_DisposableIncome,aes(x=Disposable_Income,y=ZillowAvgHomePrice,color=Year))+
  geom_point() + 
  scale_y_continuous(labels = scales::dollar) +
  scale_x_continuous(labels = scales::dollar) +
  facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
  theme_bw() +
  theme(legend.position = 'bottom', axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylab("Zillow Average Home Price") +
  xlab("Disposable Income") +
  ggtitle("Changes In Home Price And Disposable Income (2000-2022)") +
  scale_color_gradient(low="blue", high="red") +
  theme(legend.key.width = unit(1.5,'cm'))

# Correlation test between Disposable Income and Zillow Average Home Prices
cor.test(Zillow_DisposableIncome$Disposable_Income,Zillow_DisposableIncome$ZillowAvgHomePrice)
## 
##  Pearson's product-moment correlation
## 
## data:  Zillow_DisposableIncome$Disposable_Income and Zillow_DisposableIncome$ZillowAvgHomePrice
## t = 9.7777, df = 90, p-value = 8.275e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6012713 0.8042912
## sample estimates:
##      cor 
## 0.717703
# Permutation test to further test the correlation
    # Null Distribution - intended to have no relationship by design 
    replicate(5000, {
      Zillow_DisposableIncome |>
        mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
        summarize(corr=cor(ZillowAvgHomePrice,Disposable_Income)) |>
        pull(corr)
    }) -> T_corr
    hist(T_corr)

    # Observed (Same number from the correlation test above)
    Zillow_DisposableIncome |>
    #mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
    summarize(corr=cor(ZillowAvgHomePrice,Disposable_Income)) |>
    pull(corr)
## [1] 0.717703
# When the observed is very inconsistent with the null distribution, we can reject the null distribution. The observed data is so unlikely to have occurred if the null were true. 

Consumer Spending and Home Value

allRegions_Zillow <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
allRegions_ConsumerSpending <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_ConsumerSpending.csv", show_col_types = FALSE)


# Joining All Regions Zillow Average Home Price data and All Regions Consumer Spending data
Zillow_ConsumerSpending <- inner_join(allRegions_Zillow,allRegions_ConsumerSpending, join_by('REGION','Year'))
glimpse(Zillow_ConsumerSpending)
## Rows: 92
## Columns: 4
## $ REGION             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year               <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ Consumer_Spending  <dbl> 156018.2, 163250.9, 169863.4, 179593.7, 190131.8, 2…
# Visualization of Changes In Home Price And Consumer Spending 
ggplot(Zillow_ConsumerSpending, aes(y=ZillowAvgHomePrice,x=Consumer_Spending,color=Year)) + 
  geom_point() + 
  scale_y_continuous(labels = scales::dollar) +
  scale_x_continuous(labels = scales::dollar) +
  facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
  theme_bw() +
  theme(legend.position = 'bottom', axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylab("Zillow Average Home Price") +
  xlab("Consumer Spending") +
  ggtitle("Changes In Home Price And Consumer Spending (2000-2022)") +
  scale_color_gradient(low="blue", high="red") +
  theme(legend.key.width = unit(1.5,'cm')) 

# Correlation test between Consumer Spending and Zillow Average Home Prices
cor.test(Zillow_ConsumerSpending$ZillowAvgHomePrice,Zillow_ConsumerSpending$Consumer_Spending)
## 
##  Pearson's product-moment correlation
## 
## data:  Zillow_ConsumerSpending$ZillowAvgHomePrice and Zillow_ConsumerSpending$Consumer_Spending
## t = 10.106, df = 90, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6163133 0.8125708
## sample estimates:
##       cor 
## 0.7290958
# Permutation test to further test the correlation
    # Null Distribution - intended to have no relationship by design 
    replicate(5000, {
      Zillow_ConsumerSpending |>
        mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
        summarize(corr=cor(ZillowAvgHomePrice,Consumer_Spending)) |>
        pull(corr)
    }) -> T_corr
    hist(T_corr)

    # Observed (Same number from the correlation test above)
    Zillow_ConsumerSpending |>
    #mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
    summarize(corr=cor(ZillowAvgHomePrice,Consumer_Spending)) |>
    pull(corr)
## [1] 0.7290958
# When the observed is very inconsistent with the null distribution, we can reject the null distribution. The observed data is so unlikely to have occurred if the null were true. 

Question 3

How have major events affected population dynamics, housing markets, and economic indicators within affected regions?

2008 Crisis

consumerSpending <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_ConsumerSpending.csv", show_col_types = FALSE)
disposableIncome <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_DisposableIncome.csv", show_col_types = FALSE)
popEstimates <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_PopEstimates.csv", show_col_types = FALSE)
salesSFH <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_SalesSFH.csv", show_col_types = FALSE)
zillowData <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
mortgage15 <- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /MORTGAGE15US.csv", show_col_types = FALSE)
mortgage30 <- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /MORTGAGE30US.csv", show_col_types = FALSE)
unemployment<- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /UNRATE.csv", show_col_types = FALSE)


# Merging all data-sets into one comprehensive data-frame, with emphasis on years 2005 to 2013
economic_data <- reduce(list(consumerSpending, disposableIncome, popEstimates, salesSFH, zillowData), full_join, by = c("REGION", "Year"))
economic_data_crisis <- filter(economic_data, Year >= 2005 & Year <= 2013)


# Apply region mapping
region_mapping <- c('1' = 'Northeast', '2' = 'Midwest', '3' = 'South', '4' = 'West')
economic_data_crisis$REGION <- factor(economic_data_crisis$REGION, levels = names(region_mapping), labels = region_mapping)
glimpse(economic_data_crisis)
## Rows: 36
## Columns: 7
## $ REGION             <fct> Northeast, Northeast, Northeast, Northeast, Northea…
## $ Consumer_Spending  <dbl> 201264.7, 211507.9, 220690.2, 227822.0, 225554.8, 2…
## $ Year               <dbl> 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 201…
## $ Disposable_Income  <dbl> 213357.4, 226112.3, 238219.6, 247891.2, 252763.9, 2…
## $ Population         <dbl> 6050137, 6058073, 6072596, 6097325, 6125900, 615122…
## $ SFH_Sales          <dbl> 81417, 63583, 64083, 35333, 31417, 30667, 21167, 28…
## $ ZillowAvgHomePrice <dbl> 251367.6, 267143.3, 265116.4, 254258.0, 236936.7, 2…
# Home Price & Population Trends
# Create separate data frames for home prices and population
home_prices <- economic_data_crisis[c("REGION", "Year", "ZillowAvgHomePrice")]
population <- economic_data_crisis[c("REGION", "Year", "Population")]


# Plot home price trends during 2008 crisis
home_price_plot <- ggplot(home_prices, aes(x = Year, y = ZillowAvgHomePrice, color = factor(REGION))) +
  geom_line() +
  labs(title = "Home Price Trends (2005-2013)", y = "Home Price", color = "Region") +
  theme_minimal() +
  scale_y_continuous(labels = dollar)  
home_price_plot

# Home price percent change 2008 crisis
home_price_percentChange <- home_prices|>
  group_by(REGION) |>
  mutate(Previous_ZillowAvgHomePrice = lag(ZillowAvgHomePrice, 1)) |>
  mutate(Percent_Change=((ZillowAvgHomePrice-Previous_ZillowAvgHomePrice)/Previous_ZillowAvgHomePrice)*100) |>
  select(-ZillowAvgHomePrice,-Previous_ZillowAvgHomePrice)
print(home_price_percentChange, n=36)
## # A tibble: 36 × 3
## # Groups:   REGION [4]
##    REGION     Year Percent_Change
##    <fct>     <dbl>          <dbl>
##  1 Northeast  2005        NA     
##  2 Northeast  2006         6.28  
##  3 Northeast  2007        -0.759 
##  4 Northeast  2008        -4.10  
##  5 Northeast  2009        -6.81  
##  6 Northeast  2010        -2.01  
##  7 Northeast  2011        -3.63  
##  8 Northeast  2012        -2.30  
##  9 Northeast  2013         2.50  
## 10 Midwest    2005        NA     
## 11 Midwest    2006         3.69  
## 12 Midwest    2007         0.706 
## 13 Midwest    2008        -2.84  
## 14 Midwest    2009         4.64  
## 15 Midwest    2010        -2.15  
## 16 Midwest    2011        -4.22  
## 17 Midwest    2012        -0.846 
## 18 Midwest    2013         6.27  
## 19 South      2005        NA     
## 20 South      2006         9.90  
## 21 South      2007         2.25  
## 22 South      2008        -3.73  
## 23 South      2009        -6.17  
## 24 South      2010        -2.84  
## 25 South      2011        -4.01  
## 26 South      2012        -0.490 
## 27 South      2013         4.97  
## 28 West       2005        NA     
## 29 West       2006        13.9   
## 30 West       2007         2.95  
## 31 West       2008        -6.12  
## 32 West       2009       -10.7   
## 33 West       2010        -4.35  
## 34 West       2011        -5.20  
## 35 West       2012        -0.0808
## 36 West       2013         9.16
# Plot population trends during 2008 crisis
population_plot <- ggplot(population, aes(x = Year, y = Population, color = factor(REGION))) +
  geom_line() +
  labs(title = "Population Trends (2005-2013)", y = "Population", color = "Region") +
  theme_minimal() +
  scale_y_continuous(labels = comma)  
population_plot

# Population percent change 2008 crisis
population_percentChange <- population |> 
  group_by(REGION) |> 
  mutate(Previous_pop = lag(Population, 1)) |>
  mutate(Percent_Change=((Population-Previous_pop)/Previous_pop)*100) |>
  select(-Population,-Previous_pop)
print(population_percentChange, n=36)
## # A tibble: 36 × 3
## # Groups:   REGION [4]
##    REGION     Year Percent_Change
##    <fct>     <dbl>          <dbl>
##  1 Northeast  2005         NA    
##  2 Northeast  2006          0.131
##  3 Northeast  2007          0.240
##  4 Northeast  2008          0.407
##  5 Northeast  2009          0.469
##  6 Northeast  2010          0.413
##  7 Northeast  2011          0.439
##  8 Northeast  2012          0.308
##  9 Northeast  2013          0.227
## 10 Midwest    2005         NA    
## 11 Midwest    2006          0.421
## 12 Midwest    2007          0.402
## 13 Midwest    2008          0.347
## 14 Midwest    2009          0.337
## 15 Midwest    2010          0.341
## 16 Midwest    2011          0.272
## 17 Midwest    2012          0.266
## 18 Midwest    2013          0.332
## 19 South      2005         NA    
## 20 South      2006          1.49 
## 21 South      2007          1.48 
## 22 South      2008          1.35 
## 23 South      2009          1.22 
## 24 South      2010          1.16 
## 25 South      2011          0.993
## 26 South      2012          1.06 
## 27 South      2013          0.958
## 28 West       2005         NA    
## 29 West       2006          1.35 
## 30 West       2007          1.23 
## 31 West       2008          1.31 
## 32 West       2009          1.18 
## 33 West       2010          1.13 
## 34 West       2011          0.889
## 35 West       2012          0.947
## 36 West       2013          0.938
# Arrange plots side by side
multi_panel_plot <- plot_grid(home_price_plot, population_plot, ncol = 2)
multi_panel_plot

# Economic Indicators Over Time (2005-2013)
ggplot(economic_data_crisis, aes(x = Year)) +
  geom_line(aes(y = Consumer_Spending, color = "Consumer Spending (USD)")) +
  geom_line(aes(y = Disposable_Income, color = "Disposable Income (USD)")) +
  geom_line(aes(y = ZillowAvgHomePrice, color = "Home Prices (USD)")) +
  facet_wrap(~REGION) +  
  labs(title = "Economic Indicators Over Time (2005-2013)", y = "Value") +
  scale_color_manual(values = c("Consumer Spending (USD)" = "blue", 
                                "Disposable Income (USD)" = "green", 
                                "Home Prices (USD)" = "red")) +
  theme_minimal() +
  guides(color = guide_legend(title = "Indicators")) +
  scale_y_continuous(labels = comma)

# Mortgage rates from 2000-2010
# Extracting year from the DATE column for merging
mortgage15$Year <- format(as.Date(mortgage15$DATE), "%Y")
mortgage30$Year <- format(as.Date(mortgage30$DATE), "%Y")

# Convert Year to numeric for consistency
mortgage15$Year <- as.numeric(mortgage15$Year)
mortgage30$Year <- as.numeric(mortgage30$Year)

# Aggregating average yearly mortgage rates before merging
avg_mortgage15 <- mortgage15 %>%
  group_by(Year) %>%
  summarise(AvgMortgage15Rate = mean(MORTGAGE15US))

avg_mortgage30 <- mortgage30 %>%
  group_by(Year) %>%
  summarise(AvgMortgage30Rate = mean(MORTGAGE30US))

# Merging mortgage rate data with main economic data
economic_data_crisis <- economic_data_crisis %>%
  left_join(avg_mortgage15, by = "Year") %>%
  left_join(avg_mortgage30, by = "Year")

# Filtering mortgage data from 2000 to 2010
mortgage15_filtered <- filter(mortgage15, Year >= 2000 & Year <= 2010)
mortgage30_filtered <- filter(mortgage30, Year >= 2000 & Year <= 2010)

# Plotting the mortgage rate trends from 2000 to 2010
mortgage_rate_plot <- ggplot() +
  geom_line(data = mortgage15_filtered, aes(x = DATE, y = MORTGAGE15US, color = "15-Year Mortgage Rate"), linewidth = 1) +
  geom_line(data = mortgage30_filtered, aes(x = DATE, y = MORTGAGE30US, color = "30-Year Mortgage Rate"), linewidth = 1) +
  labs(title = "Mortgage Rates (2000-2010)",
       x = "Year",
       y = "Mortgage Rate (%)",
       color = "Mortgage Rate Type") +
  scale_color_manual(values = c("15-Year Mortgage Rate" = "steelblue", "30-Year Mortgage Rate" = "darkorange")) +
  theme_minimal() +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")
mortgage_rate_plot

# Unemployment visualization 
ggplot(data = unemployment, aes(x = DATE, y = UNRATE)) +
  geom_line() + # This adds a line graph
  labs(title = "Unemployment Rate Over Time",
       x = "Date",
       y = "Unemployment Rate (%)") +
  theme_minimal()

COVID Crisis

# Focus on the years from 2019 to 2022
Covid_crisis <- filter(economic_data, Year >= 2019 & Year <= 2022)


# Apply region mapping
region_mapping <- c('1' = 'Northeast', '2' = 'Midwest', '3' = 'South', '4' = 'West')
Covid_crisis$REGION <- factor(Covid_crisis$REGION, levels = names(region_mapping), labels = region_mapping)
glimpse(Covid_crisis)
## Rows: 16
## Columns: 7
## $ REGION             <fct> Northeast, Northeast, Northeast, Northeast, Midwest…
## $ Consumer_Spending  <dbl> 315365.2, 305907.9, 340473.0, 369889.4, 239730.7, 2…
## $ Year               <dbl> 2019, 2020, 2021, 2022, 2019, 2020, 2021, 2022, 201…
## $ Disposable_Income  <dbl> 354637.6, 378656.2, 397053.7, 388295.1, 265311.9, 2…
## $ Population         <dbl> 6220311, 6381164, 6360380, 6336316, 5694084, 574748…
## $ SFH_Sales          <dbl> 30333, 37667, 35833, 32583, 72417, 93250, 84250, 65…
## $ ZillowAvgHomePrice <dbl> 286114.9, 303382.1, 349039.7, 389775.3, 179952.1, 1…
# Population trends during Covid 
population_covid <- Covid_crisis[c("REGION", "Year", "Population")]

# Plot population trends Covid
population_plot_covid <- ggplot(population_covid, aes(x = Year, y = Population, color = factor(REGION))) +
  geom_line() +
  labs(title = "Population Trends (COVID)", y = "Population", color = "Region") +
  theme_minimal() +
  scale_y_continuous(labels = comma)  
population_plot_covid

# Population Percent Change during Covid 
population_percentChange_covid <- population_covid |> 
  group_by(REGION) |> 
  mutate(Previous_pop = lag(Population, 1)) |>
  mutate(Percent_Change=((Population-Previous_pop)/Previous_pop)*100) |>
  select(-Population,-Previous_pop)
population_percentChange_covid
## # A tibble: 16 × 3
## # Groups:   REGION [4]
##    REGION     Year Percent_Change
##    <fct>     <dbl>          <dbl>
##  1 Northeast  2019        NA     
##  2 Northeast  2020         2.59  
##  3 Northeast  2021        -0.326 
##  4 Northeast  2022        -0.378 
##  5 Midwest    2019        NA     
##  6 Midwest    2020         0.938 
##  7 Midwest    2021        -0.173 
##  8 Midwest    2022        -0.0976
##  9 South      2019        NA     
## 10 South      2020         0.705 
## 11 South      2021         0.702 
## 12 South      2022         1.06  
## 13 West       2019        NA     
## 14 West       2020         0.401 
## 15 West       2021        -0.0755
## 16 West       2022         0.200
# US shape file on R
us_map <- us_map(regions = "states") |> select(full,geom)


# Merging All States Zillow data with US shape file 
Zillow_2019_2023 <- All_Zillow_Longer |> 
  group_by(STATE) |> 
  filter(Year %in% c(2019:2023)) |>
  inner_join(us_map, join_by('STATE'=='full'))


# Animation of Changes in Zillow Average Home Prices during Covid 
ggplot(Zillow_2019_2023, aes(fill=ZillowAvgHomePrice, geometry = geom)) +
  geom_sf() +
  transition_time(as.integer(Year)) +
  theme(
    axis.title = element_blank(),  
    axis.text = element_blank(),   
    axis.ticks = element_blank(),  
    panel.background = element_blank(),  
    panel.border = element_blank(),  
    panel.grid.major = element_blank(),  
    panel.grid.minor = element_blank(),
    legend.position = "left")+ 
  scale_fill_continuous() +
  scale_x_log10() +
  scale_y_log10() +
  scale_fill_distiller(type="div",
                       palette="RdGy",
                       name="Zillow Price Scale",
                       direction=-1,
                       labels=scales::dollar_format())  +
  guides(color="none") +
  labs(fill = "Zillow Price Scale",
       caption = "Source: Zillow Home Value Index: zillow.com/data/") +
  ggtitle("{frame_time} Zillow Average Home Price Change")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

# Population Pyramid of Single-Family Houses Sales 
ggplot(allRegions_SalesSFH, aes(x=Year, y=SFH_Sales, fill=REGION)) +
  geom_bar(stat = "identity") +
  coord_flip() +  
  facet_wrap(~REGION, scales = "free_y", labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +  
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Change in Sales of Single-Family Homes: 2000 - 2023", x = "Year", y = "Number of Sales") +
  theme_bw()+
  theme(legend.position = "none") +
  scale_color_date() 

# Trend analysis over Covid years
ggplot(Covid_crisis, aes(x = Year)) +
  geom_line(aes(y = Consumer_Spending, color = "Consumer Spending (USD)")) +
  geom_line(aes(y = Disposable_Income, color = "Disposable Income (USD)")) +
  geom_line(aes(y = ZillowAvgHomePrice, color = "Home Prices (USD)")) +
  facet_wrap(~REGION) +
  labs(title = "Economic Indicators Over Time (COVID)", y = "Value") +
  scale_color_manual(values = c("Consumer Spending (USD)" = "blue", 
                                "Disposable Income (USD)" = "green", 
                                "Home Prices (USD)" = "red")) +
  theme_minimal() +
  guides(color = guide_legend(title = "Indicators")) +
  scale_y_continuous(labels = comma)